# Author: Darren Colby
# Date: 6/8/2021
# Purpose: To create a usable dataset for network analysis on Mexican cartel
# Fragmentation after the capture of Joaquin Guzman
# Data sources: Events: ACLED, UCDP GED, SCAD; Cocaine prices: UNODC


# Imports and reading the data --------------------------------------------

library(tidyverse)
library(lubridate)
library(abjutils)

drugnet <- read_csv("data/input_data.csv")

# Make consistent city and actor names ------------------------------------

drugnet <- drugnet %>% 
   
   # Convert entities to lowercase and remove accents
   mutate(city = city %>%  
             str_to_lower() %>% 
             rm_accent(),
      sideA = sideA %>% 
             str_to_lower() %>% 
             rm_accent(),
          sideB = sideB %>% 
             str_to_lower() %>% 
             rm_accent()) %>% 
   
   # Make city names consistent
   mutate(city = case_when(  
      city == "miguel aleman municipality" ~ "ciudad miguel aleman",
      city == "victoria municipality" ~ "ciudad victoria",
      city == "camargo municipality" ~ "ciudad camargo",
      city == "juarez municipality" ~ "ciudad juarez",
      city == "villa lopez municipality" ~ "lopez municipality",
      city == "monterrey municipality" ~ "monterrey",
      city == "san pedro garza garcia" ~ "san pedro garza garcia municipality",
      city == "madera municipality" ~ "madera",
      city == "guerrero" ~ "praxedis g. guerrero municipality",
      city == "garcia" ~ "San Pedro Garza Garcia municipality",
      city == "salinas victoria municipality" ~ "ciudad victoria",
      city == "juan aldama" ~ "juan aldama muicipality",
      city == "aldama municipality" ~ "juan aldama victoria",
      city == "guadalajara municipality" ~ "guadalajara",
      city == "pueblo nuevo solistahuacan municipality" ~ "pueblo nuevo solistahuacan",
      city == "veracruz municipality" ~ "veracruz",
      TRUE ~ city)) %>% 
   
   # Makes the names of the groups consistent
   mutate(sideA = case_when(  
      sideA == "cjng: jalisco new generation cartel" ~ "cartel jalisco nueva generacion",
      sideA == "the gulf gang" ~ "gulf cartel",
      sideA == "chamula communal militia (mexico)" ~ "chamula militia",
      sideA == "chamula army" ~ "chamula militia",
      sideA == "cdn: cartel del noreste" ~ "los zetas - cartel del noreste faction",
      sideA == "los zetas gang" ~ "los zetas",
      sideA == "gente nueva gang" ~ "gente nueva cartel",
      sideA == "zeta cartel inmates" ~ "los zetas",
      sideA == "la familia michoacana cartel" ~ "la familia michoacan cartel",
      sideA == "zetas" ~ "los zetas",
      sideA == "zetas drug cartel" ~ "los zetas",
      sideA == "jalisco new generation cartel" ~ "cartel jalisco nueva generacion",
      sideA == "fusdeg self defense group" ~ "faction of front for security and development vigilante group",
      sideA == "la linea drug gang" ~ "la linea gang",
      sideA == "los sinaloas gang" ~ "sinaloa cartel",
      sideA == "sinaloa cartel gunmen" ~ "sinaloa cartel",
      sideA == "jalisco nueva generacion gang" ~ "cartel jalisco nueva generacion",
      sideA == "jalisco cartel new generation" ~ "cartel jalisco nueva generacion",
      sideA == "upoeg vigilante group" ~ "upoeg self defense group",
      sideA == "beltran leyva gunmen" ~ "beltran leyva cartel",
      sideA == "gulf cartel inmates" ~ "gulf cartel",
      sideA == "fusdeg vigilante group" ~ "faction of front for security and development vigilante group",
      sideA == "la linea" ~ "la linea gang",
      sideA == "la familia" ~ "la familia michoacan cartel",
      sideA == "los zetas vieja escuela gang" ~ "los zetas - old school zetas faction",
      TRUE ~ sideA),
      sideB = case_when(
         sideB == "cjng: jalisco new generation cartel" ~ "cartel jalisco nueva generacion",
         sideB == "the gulf gang" ~ "gulf cartel",
         sideB == "chamula communal militia (mexico)" ~ "chamula militia",
         sideB == "chamula army" ~ "chamula militia",
         sideB == "cdn: cartel del noreste" ~ "los zetas - cartel del noreste faction",
         sideB == "los zetas gang" ~ "los zetas",
         sideB == "gente nueva gang" ~ "gente nueva cartel",
         sideB == "zeta cartel inmates" ~ "los zetas",
         sideB == "la familia michoacana cartel" ~ "la familia michoacan cartel",
         sideB == "zetas" ~ "los zetas",
         sideB == "zetas drug cartel" ~ "los zetas",
         sideB == "jalisco new generation cartel" ~ "cartel jalisco nueva generacion",
         sideB == "fusdeg self defense group" ~ "faction of front for security and development vigilante group",
         sideB == "la linea drug gang" ~ "la linea gang",
         sideB == "los sinaloas gang" ~ "sinaloa cartel",
         sideB == "sinaloa cartel gunmen" ~ "sinaloa cartel",
         sideB == "jalisco nueva generacion gang" ~ "cartel jalisco nueva generacion",
         sideB == "jalisco cartel new generation" ~ "cartel jalisco nueva generacion",
         sideB == "upoeg vigilante group" ~ "upoeg self defense group",
         sideB == "beltran leyva gunmen" ~ "beltran leyva cartel",
         sideB == "gulf cartel inmates" ~ "gulf cartel",
         sideB == "fusdeg vigilante group" ~ "faction of front for security and development vigilante group",
         sideB == "la linea" ~ "la linea gang",
         sideB == "la familia" ~ "la familia michoacan cartel",
         sideB == "los zetas vieja escuela gang" ~ "los zetas - old school zetas faction",
         TRUE ~ sideB)) %>% 
   ungroup() %>% 
   
   # Deduplicate events
   distinct(date, city, sideA, sideB) %>% 
   select(-city)

# Convert date to year ----------------------------------------------------

drugnet <- drugnet %>% 
   mutate(date = mdy(date),
          year = as.numeric(year(date))) %>% 
   ungroup() %>% 
   relocate(year) %>% 
   select(-date)

# Create weights for each tie based on the number of attacks --------------

drugnet <- drugnet %>% 
   group_by(sideA, sideB) %>% 
   mutate(weight = n()) %>% 
   ungroup()

# Make variable for aggressiveness -----------------------------------------

drugnet <- drugnet %>% 
   group_by(sideA) %>% 
   mutate(agression = log(sum(weight))) %>% 
   ungroup()

# Write to a csv file -----------------------------------------------------

write_csv(drugnet, "output/drugnet.csv")
